Source code for hysop.backend.device.opencl.opencl_tools

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Classes and tools used to handle the OpenCL backend.

* :class:`~hysop.gpu.tools.OpenClEnvironment`:
   object handling opencl platform, device ... info.
* :func:`~hysop.gpu.tools.get_opengl_shared_environment`:
   build or get an OpenCL environment with openGL properties.
* :func:`~hysop.gpu.tools.get_opencl_environment`:
   build or get an OpenCL environment.
* :func:`~hysop.gpu.tools.explore`
   explore system and display platform, devices, memory ... info.



"""

import sys, os, re, itertools, hashlib, gzip, hashlib

from hysop import (
    __VERBOSE__,
    __KERNEL_DEBUG__,
    __DEFAULT_PLATFORM_ID__,
    __DEFAULT_DEVICE_ID__,
)
from hysop import vprint

from hysop.backend.device.opencl import cl, __OPENCL_PROFILE__
from hysop.constants import np, Precision, DeviceType, HYSOP_REAL
from hysop.core.mpi import MPI
from hysop.tools.parameters import MPIParams
from hysop.tools.io_utils import IO
from hysop.tools.decorators import static_vars
from hysop.tools.htypes import check_instance, to_tuple, first_not_None


[docs] class KernelError(Exception): """ Custom exception for kernel errors. """ def __init__(msg, err): super(KernelError, self).__init__(msg) self.msg = msg self.err = err def __str__(self): return self.err + ": " + self.msg
[docs] def explore(): """Scan system and print OpenCL environment details""" print("OpenCL exploration : ") platforms = cl.get_platforms() platforms_info = ["name", "version", "vendor", "profile", "extensions"] devices_info = [ "name", "version", "vendor", "profile", "extensions", "available", "type", "compiler_available", "double_fp_config", "single_fp_config", "global_mem_size", "global_mem_cache_type", "global_mem_cache_size", "global_mem_cacheline_size", "local_mem_size", "local_mem_type", "max_clock_frequency", "max_compute_units", "max_constant_buffer_size", "max_mem_alloc_size", "max_work_group_size", "max_work_item_dimensions", "max_work_item_sizes", "preferred_vector_width_double", "preferred_vector_width_float", "preferred_vector_width_int", ] for pltfm in plaforms: print("Platform:", pltfm.name) for pltfm_info in platforms_info: print(" |-", pltfm_info, ":", eval("pltfm." + pltfm_info)) devices = pltfm.get_devices() for dvc in devices: print(" |- Device:", dvc.name) for dvc_info in devices_info: print(" |-", dvc_info, ":", eval("dvc." + dvc_info))
[docs] def convert_device_type(device_type): """ Converts a hysop device type to corresponding opencl device type. """ if device_type is None: return None check_instance(device_type, DeviceType) conversion = { DeviceType.ALL: cl.device_type.ALL, DeviceType.ACCELERATOR: cl.device_type.ACCELERATOR, DeviceType.CPU: cl.device_type.CPU, DeviceType.GPU: cl.device_type.GPU, # DeviceType.CUSTOM: cl.device_type.CUSTOM, DeviceType.DEFAULT: cl.device_type.DEFAULT, } if device_type not in conversion.keys(): msg = f"Unknown device type {device_type}." raise ValueError(msg) return conversion[device_type]
[docs] def convert_precision(precision): """ Converts a hysop precision to corresponding numpy dtype. """ if precision is None: return None check_instance(precision, Precision) if precision == Precision.SAME: msg = "Cannot convert Precision.SAME to numpy dtype." raise ValueError(msg) if precision == Precision.QUAD: msg = "Numpy does not support the 128-bit IEEE quad precision data type." raise RuntimeError(msg) # TODO when long double will be supported check if device has np.float96 or np.float128 long doubles # (ie padded to 3*32bits or 2*64bits) conversion = { Precision.DEFAULT: HYSOP_REAL, Precision.LONG_DOUBLE: np.longdouble, Precision.DOUBLE: np.float64, Precision.FLOAT: np.float32, Precision.HALF: np.float16, } if precision not in conversion.keys(): msg = f"Unknown precision {precision}." raise ValueError(msg) return conversion[precision]
[docs] @static_vars(opencl_environments=dict()) def get_device_number(platform_id=None): platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__) platform = get_platform(platform_id, strict=True) return len(platform.get_devices())
[docs] @static_vars(opencl_environments=dict()) def get_or_create_opencl_env( mpi_params, platform_id=None, device_id=None, device_type=None, gl_sharing=False, **kargs, ): """ Create or an OpenClEnvironment from given parameters if it does not already exists. All environements are kept alive (cached) in a dictionary local to this function (ie. all opencl operators can share the same OpenClEnvironment). """ platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__) device_id = first_not_None(device_id, __DEFAULT_DEVICE_ID__) device_type = first_not_None(device_type, DeviceType.ALL) check_instance(mpi_params, MPIParams) check_instance(platform_id, int) check_instance(device_id, int) check_instance(device_type, DeviceType, allow_none=True) check_instance(gl_sharing, bool) key = ( mpi_params, platform_id, device_id, device_type, gl_sharing, ) opencl_envs = get_or_create_opencl_env.opencl_environments if key in opencl_envs: return opencl_envs[key] from hysop.backend.device.opencl.opencl_env import OpenClEnvironment env = OpenClEnvironment( platform_id=platform_id, device_id=device_id, device_type=device_type, gl_sharing=gl_sharing, mpi_params=mpi_params, **kargs, ) opencl_envs[key] = env return env
[docs] def create_queue(ctx, props=None): """ Returns OpenCL queue from context ctx : OpenCL context """ if props is not None: queue = cl.CommandQueue(ctx, props) elif __OPENCL_PROFILE__: props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) else: queue = cl.CommandQueue(ctx) return queue
[docs] def get_work_items(resolution, vector_width=1): """Set the optimal work-item number and OpenCL space index. Parameters ---------- resolution : tuple local mesh resolution vector_width : int OpenCL vector types width Returns ------- int : work-item number tuple : global space index tuple : local space index Use 64 work-items in 3D and 256 in 2D. Use Both the number from device capability The problem must be a multiple of and greater than work-item number * vector_width """ # Optimal work item number if len(resolution) == 3: workItemNumber = 64 if min(resolution) >= 64 else min(resolution) else: workItemNumber = 256 if min(resolution) >= 256 else min(resolution) # Change work-item regarding problem size if resolution[0] % workItemNumber > 0: if len(resolution) == 3: print( "Warning : GPU best performances obtained for", ) print("problem sizes multiples of 64") else: print( "Warning : GPU best performances obtained for", ) print("problem sizes multiples of 256") while resolution[0] % workItemNumber > 0: workItemNumber = workItemNumber // 2 # Change work-item regarding vector_width if workItemNumber * vector_width > resolution[0]: if resolution[0] % vector_width > 0: raise ValueError( "Resolution ({}) must be a multiple of {}".format( resolution[0], vector_width ) ) workItemNumber = resolution[0] // vector_width if len(resolution) == 3: gwi = (int(workItemNumber), int(resolution[1]), int(resolution[2])) lwi = (int(workItemNumber), 1, 1) else: gwi = (int(workItemNumber), int(resolution[1])) lwi = (int(workItemNumber), 1) return workItemNumber, gwi, lwi
[docs] def get_platform(platform_id, strict): """Returns an OpenCL platform platform_id : int OpenCL platform ID. strict: bool If set to true, raise an error if the platform does not exist. Else fallback to the default platform. """ assert platform_id is not None try: # OpenCL platform platform = cl.get_platforms()[platform_id] except IndexError: plist = cl.get_platforms() platform = plist[0] msg = f" Incorrect platform_id : {platform_id}" msg += f" Only {len(plist)} are available." if strict: msg += "\n FATAL ERROR: Strict platform_id condition violated.\n" print(msg) raise else: msg += f" --> getting default platform {platform.name}." vprint(msg) return platform
[docs] def get_device(platform, device_id, device_type, strict): """Returns an OpenCL device Parameters ---------- platform : cl.Platform chosen platform. device_id : int chosen device id. device_type : string chosen device type. strict: bool If set to true, raise an error if the device does not exist. Else fallback to the first platform device. Try to use given parameters and in case of fails, use pyopencl context creation function. """ try: if device_type != cl.device_type.ALL: devices = platform.get_devices(device_type) device = devices[device_id] else: device = platform.get_devices()[device_id] except IndexError: msg = f"\nIncorrect device_id {device_id}" msg += f"\nThere is only {len(platform.get_devices())} devices available." if strict: msg += "\nFATAL ERROR: Strict device_id condition violated.\n" print(msg) raise else: msg += f"\nGetting first device of type {device_type}." vprint(msg) device = platform.get_devices()[0] except: msg = f"\nCould not get a device of type {device_type}" if strict: msg += "\nFATAL ERROR: Strict device_type condition violated.\n" vprint(msg) raise else: msg += "\nGetting first device in platform." vprint(msg) device = platform.get_devices()[0] return device
[docs] @static_vars(contexts={}) def get_context(devices, gl_sharing): """Returns OpenCL context Parameters ---------- devices: OpenCL device or tuple of devices which handles the context. gl_sharing : bool True to build a context shared between OpenGL and OpenCL. Default=False. Notes ----- Only one context is created per vendor/platform, containing all devices. """ props = None devices = to_tuple(devices) contexts = get_context.contexts key = devices + (gl_sharing,) if key in contexts: return contexts[key] if gl_sharing: from pyopencl.tools import get_gl_sharing_context_properties if sys.platform == "darwin": props = get_gl_sharing_context_properties() else: # Some OSs prefer clCreateContextFromType, some prefer # clCreateContext. Try both. props = [ (cl.context_properties.PLATFORM, self.platform) ] + get_gl_sharing_context_properties() ctx = cl.Context(properties=props, devices=devices) else: print("PyOpenCL Context devices:", devices) ctx = cl.Context(devices=devices) contexts[key] = ctx return ctx
[docs] def parse_opencl_file(f, n=8, nb_remesh_components=1): """Parse a file containing OpenCL sources. Parameters ---------- f : string file name n : int, optional vector width, default=8 nb_remesh_components : int number of remeshed components Returns ------- string, the parsed sources. Notes ----- * __N__ is expanded as an integer corresponding to vector width. * __NN__ instruction is duplicated to operate on each vector component: * if line ends with ';', the whole instruciton is duplicated. * if line ends with ',' and contains '(float__N__)(', the float element is duplicated * Remeshing fields components are expanded as follows : All code between '__RCOMPONENT_S__' and '__RCOMPONENT_E__' flags are duplicated n times with n the number of components to compute. In this duplicated code, the flag '__ID__' is replaced by index of a range of lenght the number of components. A flag '__RCOMPONENT_S__P__' may be used and the duplicated elements are separated with ',' (for function parameters expanding). Examples with a 4-width vector code:: float__N__ x; -> float4 x; x.s__NN__ = 1.0f; -> x.s0 = 1.0f; x.s1 = 1.0f; x.s2 = 1.0f; x.s3 = 1.0f; x = (int__N__)(__NN__, -> x = (int4)(0, ); 1, 2, 3, ); Examples with a 2 components expansion code:: __RCOMP_P __global const float* var__ID__, -> __global const float* var0,__global const float* var1, __RCOMP_I var__ID__[i] = 0.0; -> var0[i] = 0.0;var1[i] = 0.0; aFunction(__RCOMP_P var__ID__, __RCOMP_P other__ID__); -> aFunction(var0, var1, other0, other1); """ src = "" # replacement for floatN elements vec_floatn = re.compile(r"\(float__N__\)\(") vec_nn = re.compile(r"__NN__") vec_n = re.compile(r"__N__") for l in f.readlines(): # Expand floatN items if vec_floatn.search(l) and vec_nn.search(l) and l[-2] == ",": sl = l.split("(float__N__)(") l = sl[0] + "(float" + str(n) + ")(" el = sl[1].rsplit(",", 1)[0] for i in range(n): l += vec_nn.sub(str(i), el) + "," l = l[:-1] + "\n" # Expand floatN elements access elif vec_nn.search(l) and l[-2] == ";": el = "" for i in range(n): el += vec_nn.sub(str(i), l) l = el # Replace vector length src += vec_n.sub(str(n), l) # Replacement for remeshed components re_instr = re.compile(r"__RCOMP_I([\w\s\.,()\[\]+*/=-]+;)") # __RCOMP_I ...; def repl_instruction(m): return "".join( [m.group(1).replace("__ID__", str(i)) for i in range(nb_remesh_components)] ) # __RCOMP_P ..., ou __RCOMP_P ...) re_param = re.compile(r"__RCOMP_P([\w\s\.\[\]+*/=-]+(?=,|\)))") def repl_parameter(m): return ", ".join( [m.group(1).replace("__ID__", str(i)) for i in range(nb_remesh_components)] ) src = re_instr.sub(repl_instruction, src) src = re_param.sub(repl_parameter, src) return src